@InProceedings{SantosSiDaRoDrDu:2021:FoUnAp,
author = "Santos, Gabriel Lavoura dos and Silva, Vanessa Telles da and
Dalmolin, Laura de Aguiar and Rodrigues, Ricardo Nagel and Drews
Jr, Paulo Lilles Jorge and Duarte Filho, Nelson Lopes",
affiliation = "Universidade Federal do Rio Grande, Brazil and Universidade
Federal do Rio Grande, Brazil and Universidade Federal do Rio
Grande, Brazil and Universidade Federal do Rio Grande, Brazil
and Universidade Federal do Rio Grande, Brazil and Universidade
Federal do Rio Grande, Brazil",
title = "A Form Understanding Approach to Printed and Structured
Engineering Documentation",
booktitle = "Proceedings...",
year = "2021",
editor = "Paiva, Afonso and Menotti, David and Baranoski, Gladimir V. G. and
Proen{\c{c}}a, Hugo Pedro and Junior, Antonio Lopes Apolinario
and Papa, Jo{\~a}o Paulo and Pagliosa, Paulo and dos Santos,
Thiago Oliveira and e S{\'a}, Asla Medeiros and da Silveira,
Thiago Lopes Trugillo and Brazil, Emilio Vital and Ponti, Moacir
A. and Fernandes, Leandro A. F. and Avila, Sandra",
organization = "Conference on Graphics, Patterns and Images, 34. (SIBGRAPI)",
publisher = "IEEE Computer Society",
address = "Los Alamitos",
keywords = "form understanding, text detection, spatial layout analysis.",
abstract = "A significant amount of companies still depends on printed
documents, such as healthcare reports, engineering specifications,
or historical documents. Those documents are diverse in terms of
layout and content, thereby it requires different approaches for
each document structure, which makes information extraction a
costly and inefficient task. We classify documents into three
categories, non-structured, semi-structured, and structured
documents. The last one being the focus of the present work.We
propose a pattern recognition method for structured documents with
an anchoring relationship between question-answer objects through
a system of hypotheses and a probability distribution in order to
identify which predefined model the document belongs to.
Therefore, acting as a system for both identification and content
extraction to structured documents. The method has promising
results for pattern recognition from all document models, with 78%
to 97% objects extracted correctly.",
conference-location = "Gramado, RS, Brazil (virtual)",
conference-year = "18-22 Oct. 2021",
doi = "10.1109/SIBGRAPI54419.2021.00052",
url = "http://dx.doi.org/10.1109/SIBGRAPI54419.2021.00052",
language = "en",
ibi = "8JMKD3MGPEW34M/45CKPKE",
url = "http://urlib.net/ibi/8JMKD3MGPEW34M/45CKPKE",
targetfile = "Sibgrapi_2021 - Paper ID 64.pdf",
urlaccessdate = "2024, May 06"
}